{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "import torch\n",
    "from transformer.modeling import TinyBertForPreTraining, BertForMaskedLM, TinyBertForPreTraining\n",
    "import bert.modeling, bert.tokenization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "pt_model = TinyBertForPreTraining.from_pretrained('./tiny-bert-bahasa-cased-combined')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "sess = tf.InteractiveSession()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'{\"vocab_size\": 32000, \"hidden_size\": 312, \"num_hidden_layers\": 4, \"num_attention_heads\": 12, \"hidden_act\": \"gelu\", \"intermediate_size\": 1200, \"hidden_dropout_prob\": 0.1, \"attention_probs_dropout_prob\": 0.1, \"max_position_embeddings\": 512, \"type_vocab_size\": 2, \"initializer_range\": 0.02, \"pre_trained\": \"\", \"training\": \"\", \"cell\": {}, \"emb_size\": 312, \"structure\": []}'"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "config = bert.modeling.BertConfig.from_dict(pt_model.config.to_dict())\n",
    "json.dumps(config.__dict__)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From /home/husein/.local/lib/python3.6/site-packages/bert/modeling.py:171: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.\n",
      "\n",
      "WARNING:tensorflow:From /home/husein/.local/lib/python3.6/site-packages/bert/modeling.py:409: The name tf.get_variable is deprecated. Please use tf.compat.v1.get_variable instead.\n",
      "\n",
      "WARNING:tensorflow:From /home/husein/.local/lib/python3.6/site-packages/bert/modeling.py:490: The name tf.assert_less_equal is deprecated. Please use tf.compat.v1.assert_less_equal instead.\n",
      "\n",
      "WARNING:tensorflow:\n",
      "The TensorFlow contrib module will not be included in TensorFlow 2.0.\n",
      "For more information, please see:\n",
      "  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md\n",
      "  * https://github.com/tensorflow/addons\n",
      "  * https://github.com/tensorflow/io (for I/O related ops)\n",
      "If you depend on functionality not listed there, please file an issue.\n",
      "\n",
      "WARNING:tensorflow:From /home/husein/.local/lib/python3.6/site-packages/bert/modeling.py:671: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Use keras.layers.Dense instead.\n",
      "WARNING:tensorflow:From /home/husein/.local/lib/python3.6/site-packages/tensorflow_core/python/layers/core.py:187: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Please use `layer.__call__` method instead.\n"
     ]
    }
   ],
   "source": [
    "input_ids = tf.placeholder(tf.int32, [None, None])\n",
    "input_mask = tf.placeholder(tf.int32, [None, None])\n",
    "token_type_ids = tf.placeholder(tf.int32, [None, None])\n",
    "model = bert.modeling.BertModel(config=config, is_training=False,\n",
    "                                input_ids=input_ids, input_mask=input_mask, token_type_ids=token_type_ids)\n",
    "\n",
    "output_layer = model.get_sequence_output()\n",
    "embedding = model.get_embedding_table()\n",
    "\n",
    "with tf.variable_scope('cls/predictions'):\n",
    "    with tf.variable_scope('transform'):\n",
    "        input_tensor = tf.layers.dense(\n",
    "            output_layer,\n",
    "            units = config.hidden_size,\n",
    "            activation = bert.modeling.get_activation(config.hidden_act),\n",
    "            kernel_initializer = bert.modeling.create_initializer(\n",
    "                config.initializer_range\n",
    "            ),\n",
    "        )\n",
    "        input_tensor = bert.modeling.layer_norm(input_tensor)\n",
    "\n",
    "    output_bias = tf.get_variable(\n",
    "    'output_bias',\n",
    "    shape = [config.vocab_size],\n",
    "    initializer = tf.zeros_initializer(),\n",
    "    )\n",
    "    logits = tf.matmul(input_tensor, embedding, transpose_b = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# sess.run(model.embedding_table)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From <ipython-input-10-e03b5095ec85>:47: Variable.load (from tensorflow.python.ops.variables) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Prefer Variable.assign which has equivalent behavior in 2.X.\n"
     ]
    }
   ],
   "source": [
    "import re\n",
    "\n",
    "bert_variables = [v for v in tf.get_collection('variables') if 'bert' in v.name or 'cls' in v.name]\n",
    "tf.variables_initializer(bert_variables).run()\n",
    "\n",
    "# Based on: convert_tf_checkpoint_to_pytorch.py from pytorch-pretrained-BERT\n",
    "for variable in bert_variables:\n",
    "    name = variable.name.split(':')[0]\n",
    "    name = name.split('/')\n",
    "    array = variable.eval()\n",
    "    # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v\n",
    "    # which are not required for using pretrained model\n",
    "    if any(n in [\"adam_v\", \"adam_m\"] for n in name):\n",
    "        print(\"Skipping {}\".format(\"/\".join(name)))\n",
    "        continue\n",
    "    pytorch_var = pt_model\n",
    "    for m_name in name:\n",
    "        if re.fullmatch(r'[A-Za-z]+_\\d+', m_name):\n",
    "            l = re.split(r'_(\\d+)', m_name)\n",
    "        else:\n",
    "            l = [m_name]\n",
    "        if l[0] == 'kernel' or l[0] == 'gamma':\n",
    "            pytorch_var = getattr(pytorch_var, 'weight')\n",
    "        elif l[0] == 'output_bias' or l[0] == 'beta':\n",
    "            pytorch_var = getattr(pytorch_var, 'bias')\n",
    "        elif l[0] == 'output_weights':\n",
    "            pytorch_var = getattr(pytorch_var, 'weight')\n",
    "        elif l[0] == 'cls':\n",
    "            pytorch_var = getattr(pytorch_var, 'cls')\n",
    "        else:\n",
    "            pytorch_var = getattr(pytorch_var, l[0])\n",
    "        if len(l) >= 2:\n",
    "            num = int(l[1])\n",
    "            pytorch_var = pytorch_var[num]\n",
    "    if m_name[-11:] == '_embeddings':\n",
    "        pytorch_var = getattr(pytorch_var, 'weight')\n",
    "    elif m_name == 'kernel':\n",
    "        pytorch_var = pytorch_var.t()\n",
    "    try:\n",
    "        assert pytorch_var.shape == array.shape\n",
    "    except AssertionError as e:\n",
    "        print(e)\n",
    "        e.args += (pytorch_var.shape, array.shape)\n",
    "        raise\n",
    "\n",
    "    # print(\"Extracting PyTorch weight {}\".format(name))\n",
    "    variable.load(pytorch_var.detach().cpu().numpy())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<tf.Variable 'bert/embeddings/word_embeddings:0' shape=(32000, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/embeddings/token_type_embeddings:0' shape=(2, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/embeddings/position_embeddings:0' shape=(512, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/embeddings/LayerNorm/beta:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/embeddings/LayerNorm/gamma:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/attention/self/query/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/attention/self/query/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/attention/self/key/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/attention/self/key/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/attention/self/value/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/attention/self/value/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/attention/output/dense/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/attention/output/dense/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/attention/output/LayerNorm/beta:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/attention/output/LayerNorm/gamma:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/intermediate/dense/kernel:0' shape=(312, 1200) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/intermediate/dense/bias:0' shape=(1200,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/output/dense/kernel:0' shape=(1200, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/output/dense/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/output/LayerNorm/beta:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/output/LayerNorm/gamma:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/attention/self/query/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/attention/self/query/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/attention/self/key/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/attention/self/key/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/attention/self/value/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/attention/self/value/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/attention/output/dense/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/attention/output/dense/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/attention/output/LayerNorm/beta:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/attention/output/LayerNorm/gamma:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/intermediate/dense/kernel:0' shape=(312, 1200) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/intermediate/dense/bias:0' shape=(1200,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/output/dense/kernel:0' shape=(1200, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/output/dense/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/output/LayerNorm/beta:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/output/LayerNorm/gamma:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/attention/self/query/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/attention/self/query/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/attention/self/key/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/attention/self/key/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/attention/self/value/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/attention/self/value/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/attention/output/dense/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/attention/output/dense/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/attention/output/LayerNorm/beta:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/attention/output/LayerNorm/gamma:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/intermediate/dense/kernel:0' shape=(312, 1200) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/intermediate/dense/bias:0' shape=(1200,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/output/dense/kernel:0' shape=(1200, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/output/dense/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/output/LayerNorm/beta:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/output/LayerNorm/gamma:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/attention/self/query/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/attention/self/query/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/attention/self/key/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/attention/self/key/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/attention/self/value/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/attention/self/value/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/attention/output/dense/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/attention/output/dense/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/attention/output/LayerNorm/beta:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/attention/output/LayerNorm/gamma:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/intermediate/dense/kernel:0' shape=(312, 1200) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/intermediate/dense/bias:0' shape=(1200,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/output/dense/kernel:0' shape=(1200, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/output/dense/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/output/LayerNorm/beta:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/output/LayerNorm/gamma:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/pooler/dense/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/pooler/dense/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'cls/predictions/transform/dense/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'cls/predictions/transform/dense/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'cls/predictions/transform/LayerNorm/beta:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'cls/predictions/transform/LayerNorm/gamma:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'cls/predictions/output_bias:0' shape=(32000,) dtype=float32_ref>]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bert_variables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<tf.Variable 'bert/embeddings/word_embeddings:0' shape=(32000, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/embeddings/token_type_embeddings:0' shape=(2, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/embeddings/position_embeddings:0' shape=(512, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/embeddings/LayerNorm/beta:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/embeddings/LayerNorm/gamma:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/attention/self/query/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/attention/self/query/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/attention/self/key/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/attention/self/key/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/attention/self/value/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/attention/self/value/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/attention/output/dense/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/attention/output/dense/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/attention/output/LayerNorm/beta:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/attention/output/LayerNorm/gamma:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/intermediate/dense/kernel:0' shape=(312, 1200) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/intermediate/dense/bias:0' shape=(1200,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/output/dense/kernel:0' shape=(1200, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/output/dense/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/output/LayerNorm/beta:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_0/output/LayerNorm/gamma:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/attention/self/query/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/attention/self/query/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/attention/self/key/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/attention/self/key/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/attention/self/value/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/attention/self/value/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/attention/output/dense/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/attention/output/dense/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/attention/output/LayerNorm/beta:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/attention/output/LayerNorm/gamma:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/intermediate/dense/kernel:0' shape=(312, 1200) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/intermediate/dense/bias:0' shape=(1200,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/output/dense/kernel:0' shape=(1200, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/output/dense/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/output/LayerNorm/beta:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_1/output/LayerNorm/gamma:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/attention/self/query/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/attention/self/query/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/attention/self/key/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/attention/self/key/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/attention/self/value/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/attention/self/value/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/attention/output/dense/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/attention/output/dense/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/attention/output/LayerNorm/beta:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/attention/output/LayerNorm/gamma:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/intermediate/dense/kernel:0' shape=(312, 1200) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/intermediate/dense/bias:0' shape=(1200,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/output/dense/kernel:0' shape=(1200, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/output/dense/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/output/LayerNorm/beta:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_2/output/LayerNorm/gamma:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/attention/self/query/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/attention/self/query/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/attention/self/key/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/attention/self/key/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/attention/self/value/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/attention/self/value/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/attention/output/dense/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/attention/output/dense/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/attention/output/LayerNorm/beta:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/attention/output/LayerNorm/gamma:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/intermediate/dense/kernel:0' shape=(312, 1200) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/intermediate/dense/bias:0' shape=(1200,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/output/dense/kernel:0' shape=(1200, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/output/dense/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/output/LayerNorm/beta:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/encoder/layer_3/output/LayerNorm/gamma:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/pooler/dense/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'bert/pooler/dense/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'cls/predictions/transform/dense/kernel:0' shape=(312, 312) dtype=float32_ref>,\n",
       " <tf.Variable 'cls/predictions/transform/dense/bias:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'cls/predictions/transform/LayerNorm/beta:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'cls/predictions/transform/LayerNorm/gamma:0' shape=(312,) dtype=float32_ref>,\n",
       " <tf.Variable 'cls/predictions/output_bias:0' shape=(32000,) dtype=float32_ref>]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[v for v in tf.get_collection('variables') if 'bert' in v.name or 'cls' in v.name]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'bert-tiny-v1/model.ckpt'"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "saver = tf.train.Saver(tf.trainable_variables())\n",
    "saver.save(sess, 'bert-tiny-v1/model.ckpt')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
